allsites_zip_url <- "http://academic.udayton.edu/kissock/http/Weather/gsod95-current/allsites.zip" # Specifying a data directory:
data_dir <-path("datas") allsites_zip_path <-path(data_dir, "allsites", ext ="zip") allsites_zip_path## datas/allsites.zip
if (!file_exists(allsites_zip_path)) {
allsites_zip_url %>%curl_download(destfile = allsites_zip_path)
}file_exists(allsites_zip_path)## datas/allsites.zip
## TRUE
!file_exists(allsites_zip_path)## datas/allsites.zip
## FALSE
Basically, if the file already exists at the download path, we will not download the file again. If it does not exist, we will download the file.
allsites_zip_path %>%
unzip(exdir = data_dir)It unzipped the zip file into multiple different text files.
data_file <-path(data_dir, "ALHUNTSV", ext ="txt")data_file_name <- data_file %>%
path_file() %>%
path_ext_remove()file_state <- data_file_name %>%
str_sub(0,2)
file_city <- data_file_name %>%
str_sub(3)
file_state## [1] "AL"
file_city## [1] "HUNTSV"
alhuntsv <- data_file %>%
read_table(col_names = c("Month","Day","Year","temp")) %>%
mutate(state = file_state,city= file_city, date = make_date(year = Year, month = Month, day = Day))add <-function(number1, number2) {
result <- number1 + number2
cat(number1, "plus", number2, "equals", result, "\n")
}add(4,6)## 4 plus 6 equals 10
add(50,500)## 50 plus 500 equals 550
add_fix <-function(number1, number2) {
result <-number1 +number2
cat(number1, "plus", number2, "equals", result, "\n")
result
}add_result <- add_fix(4,6)## 4 plus 6 equals 10
mpg_density_plot <-function(variable) {
user_input <-rlang::enquo(variable)
ggplot(data = mpg) +
geom_density(mapping =aes(x =!!user_input))
}mpg_density_plot(hwy)mpg_density_plot(cty)mpg_density_plot(displ)read_data_file <-function(data_file) {
file_name <- data_file %>%
path_file() %>%
path_ext_remove()
# file_state <- Code to get two-letter state/country code from filename
file_state <- file_name %>%
str_sub(0,2)
# file_city <- Code to get city names from filename
file_city <- file_name %>%
str_sub(3)
# col_names <- Code to list column names
temperature_data_frame <- data_file %>%
read_table(col_names = c("Month","Day","Year","temperature")) %>%
mutate(state = file_state,city= file_city, date = make_date(year = Year, month = Month, day = Day))
#temperature_data_frame <- Code to read and label the data
temperature_data_frame
}alhuntsv2 <- read_data_file(data_file)data_files <-data_dir %>%
dir_ls(glob ="*.txt")
temperature_df <-data_files %>%
map_dfr(read_data_file) %>%
mutate(temperature =if_else(near(temperature, -99), as.numeric(NA), temperature))Its to remove outliers or typos that could skew the database heavily.
temperature_df_filtered <- temperature_df %>%
filter(!is.na(temperature)) %>%
filter(Year != 2018) washdc <- temperature_df_filtered %>%
filter(city == "WASHDC") washdc %>%
ggplot(mapping = aes(x = date, y = temperature)) +
geom_point()The constant oscillation is due to the change in the seasons, it becomes colder in winter and warmer in summer.
washdc_year <- washdc %>%
group_by(Year)
washdc_year %>%
ggplot(mapping = aes(x = Year, y = temperature)) +
geom_point() +
geom_smooth()washdc_year %>%
ggplot(mapping = aes(x = temperature, y = Year)) +
geom_point() +
geom_smooth()